In [1]:
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import chart_studio.plotly as py
from IPython.display import IFrame
from datetime import datetime
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode()
%matplotlib inline
In [2]:
confirmed_cases_path = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
deaths_path = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv"
cured_path = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv"
In [3]:
confirmed_cases = pd.read_csv(confirmed_cases_path)
confirmed_cases.head()
Out[3]:
Province/State Country/Region Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 ... 6/29/20 6/30/20 7/1/20 7/2/20 7/3/20 7/4/20 7/5/20 7/6/20 7/7/20 7/8/20
0 NaN Afghanistan 33.0000 65.0000 0 0 0 0 0 0 ... 31238 31517 31836 32022 32324 32672 32951 33190 33384 33594
1 NaN Albania 41.1533 20.1683 0 0 0 0 0 0 ... 2466 2535 2580 2662 2752 2819 2893 2964 3038 3106
2 NaN Algeria 28.0339 1.6596 0 0 0 0 0 0 ... 13571 13907 14272 14657 15070 15500 15941 16404 16879 17348
3 NaN Andorra 42.5063 1.5218 0 0 0 0 0 0 ... 855 855 855 855 855 855 855 855 855 855
4 NaN Angola -11.2027 17.8739 0 0 0 0 0 0 ... 276 284 291 315 328 346 346 346 386 386

5 rows × 173 columns

In [4]:
deaths_data = pd.read_csv(deaths_path)
deaths_data.head()
Out[4]:
Province/State Country/Region Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 ... 6/29/20 6/30/20 7/1/20 7/2/20 7/3/20 7/4/20 7/5/20 7/6/20 7/7/20 7/8/20
0 NaN Afghanistan 33.0000 65.0000 0 0 0 0 0 0 ... 733 746 774 807 819 826 864 898 920 936
1 NaN Albania 41.1533 20.1683 0 0 0 0 0 0 ... 58 62 65 69 72 74 76 79 81 83
2 NaN Algeria 28.0339 1.6596 0 0 0 0 0 0 ... 905 912 920 928 937 946 952 959 968 978
3 NaN Andorra 42.5063 1.5218 0 0 0 0 0 0 ... 52 52 52 52 52 52 52 52 52 52
4 NaN Angola -11.2027 17.8739 0 0 0 0 0 0 ... 11 13 15 17 18 19 19 19 21 21

5 rows × 173 columns

In [5]:
recovered_cases = pd.read_csv(cured_path)
recovered_cases.head()
Out[5]:
Province/State Country/Region Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 ... 6/29/20 6/30/20 7/1/20 7/2/20 7/3/20 7/4/20 7/5/20 7/6/20 7/7/20 7/8/20
0 NaN Afghanistan 33.0000 65.0000 0 0 0 0 0 0 ... 13934 14131 15651 16041 17331 19164 19366 20103 20179 20700
1 NaN Albania 41.1533 20.1683 0 0 0 0 0 0 ... 1438 1459 1516 1559 1592 1637 1657 1702 1744 1791
2 NaN Algeria 28.0339 1.6596 0 0 0 0 0 0 ... 9674 9897 10040 10342 10832 11181 11492 11884 12094 12329
3 NaN Andorra 42.5063 1.5218 0 0 0 0 0 0 ... 799 799 799 800 800 800 800 800 800 802
4 NaN Angola -11.2027 17.8739 0 0 0 0 0 0 ... 93 93 97 97 107 108 108 108 117 117

5 rows × 173 columns

Let us see the cumulative growth of number of cases per each day over the entire world

In [6]:
days_columns = confirmed_cases.columns[4:]
In [7]:
world_cases_growth = confirmed_cases[days_columns].sum(axis = 0).reset_index()
world_cases_growth.columns = ['Date', 'Count']
In [8]:
def isweekend(date):
    return datetime.isoweekday(pd.to_datetime(date)) > 5
world_cases_growth['isweekend'] = world_cases_growth['Date'].apply(isweekend).astype(int)
In [10]:
plt.rcParams['figure.figsize'] = [20, 10]
plt.stem(world_cases_growth['Date'], world_cases_growth['Count'], '--ro')
plt.plot(world_cases_growth['Date'], world_cases_growth['Count'], '--bo')
plt.title("Spread of virus per each day")
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.yticks(world_cases_growth['Count'])
plt.show();
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:2: UserWarning:

In Matplotlib 3.3 individual lines on a stem plot will be added as a LineCollection instead of individual lines. This significantly improves the performance of a stem plot. To remove this warning and switch to the new behaviour, set the "use_line_collection" keyword argument to True.

Sudden spike can be seen from 12-Feb to 13-Feb almost 15000 cases were identified in just a single day . But most of them could be suspected cases.

Delta difference from day to day confirmed cases

In [11]:
world_cases_growth['delta_confirmed'] = world_cases_growth['Count'].sub(world_cases_growth['Count'].shift())
world_cases_growth.loc[0, 'delta_confirmed'] = world_cases_growth.loc[0, 'Count']
In [15]:
plt.rcParams['figure.figsize'] = [20,10]
plt.stem(world_cases_growth['Date'], world_cases_growth['delta_confirmed'], '--yo')
plt.plot(world_cases_growth['Date'], world_cases_growth['delta_confirmed'], '--ro')
for i in range(world_cases_growth.shape[0]):
    plt.text(world_cases_growth.loc[i, 'Date'], world_cases_growth.loc[i, 'delta_confirmed']+2, 
             world_cases_growth.loc[i,'delta_confirmed'].astype(int))
plt.title("Delta new cases of to Covid-19 identified on each day")
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.show();
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:2: UserWarning:

In Matplotlib 3.3 individual lines on a stem plot will be added as a LineCollection instead of individual lines. This significantly improves the performance of a stem plot. To remove this warning and switch to the new behaviour, set the "use_line_collection" keyword argument to True.

Parallelly let us compare the active cases to # of deaths on each day

In [16]:
world_cases_growth['deaths'] = deaths_data[days_columns].sum(axis = 0).reset_index(drop = True)
In [17]:
plt.rcParams['figure.figsize'] = [20,10]
plt.stem(world_cases_growth['Date'], world_cases_growth['deaths'], '--yo')
plt.plot(world_cases_growth['Date'], world_cases_growth['deaths'], '--ro')
for i in range(world_cases_growth.shape[0]):
    plt.text(world_cases_growth.loc[i, 'Date'], world_cases_growth.loc[i, 'deaths']+50, 
             world_cases_growth.loc[i,'deaths'])
plt.title("Growth of death toll due to Covid-19 per each day")
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.show()
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:2: UserWarning:

In Matplotlib 3.3 individual lines on a stem plot will be added as a LineCollection instead of individual lines. This significantly improves the performance of a stem plot. To remove this warning and switch to the new behaviour, set the "use_line_collection" keyword argument to True.

But the above representation is a cumulative sum of previous day count and newly added count, We can look at the delta change for every day.

In [18]:
world_cases_growth['delta_deaths'] = world_cases_growth['deaths'].sub(world_cases_growth['deaths'].shift())
world_cases_growth.loc[0, 'delta_deaths'] = world_cases_growth.loc[0, 'deaths']
In [19]:
plt.rcParams['figure.figsize'] = [20,10]
plt.stem(world_cases_growth['Date'], world_cases_growth['delta_deaths'], '--yo')
plt.plot(world_cases_growth['Date'], world_cases_growth['delta_deaths'], '--ro')
for i in range(world_cases_growth.shape[0]):
    plt.text(world_cases_growth.loc[i, 'Date'], world_cases_growth.loc[i, 'delta_deaths']+2, 
             world_cases_growth.loc[i,'delta_deaths'].astype(int))
plt.title("# of deaths due to Covid-19 per each day")
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.show()
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:2: UserWarning:

In Matplotlib 3.3 individual lines on a stem plot will be added as a LineCollection instead of individual lines. This significantly improves the performance of a stem plot. To remove this warning and switch to the new behaviour, set the "use_line_collection" keyword argument to True.

Let us also see how many cases were cured on daily basis

In [20]:
world_cases_growth['recovered'] = recovered_cases[days_columns].sum(axis = 0).reset_index(drop = True)
In [21]:
plt.rcParams['figure.figsize'] = [20,10]
plt.stem(world_cases_growth['Date'], world_cases_growth['recovered'], '--yo')
plt.plot(world_cases_growth['Date'], world_cases_growth['recovered'], '--go')
for i in range(world_cases_growth.shape[0]):
    plt.text(world_cases_growth.loc[i, 'Date'], world_cases_growth.loc[i, 'recovered']+50, 
             world_cases_growth.loc[i,'recovered'])
plt.title("# of people that recovered from Covid-19 virus")
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.show()
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:2: UserWarning:

In Matplotlib 3.3 individual lines on a stem plot will be added as a LineCollection instead of individual lines. This significantly improves the performance of a stem plot. To remove this warning and switch to the new behaviour, set the "use_line_collection" keyword argument to True.

In [25]:
plt.rcParams['figure.figsize'] = [20, 10]
plt.figure(1)
plt.subplot(311)
plt.plot(world_cases_growth['Date'], world_cases_growth['Count'], '--bo', label = 'ACTIVE CASES')
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.legend()

plt.subplot(312)
plt.plot(world_cases_growth['Date'], world_cases_growth['deaths'], '--ro', label = 'DEATHS')
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.legend()

plt.subplot(313)
plt.plot(world_cases_growth['Date'], world_cases_growth['recovered'], '--go', label = 'CURED')

plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.legend()
plt.show()

Stacked Bar plot of confirmed, recovered and death counts on daily bases

In [27]:
p1 = plt.bar(world_cases_growth['Date'], world_cases_growth['Count'], color = 'yellow')
p2 = plt.bar(world_cases_growth['Date'], world_cases_growth['recovered'], color='g')
p3 = plt.bar(world_cases_growth['Date'], world_cases_growth['deaths'], color='r')
plt.xticks(world_cases_growth['Date'], rotation = 90)
plt.legend([p1[0], p2[0], p3[0]], ('Confirmed', 'Recovered', 'Deaths'))
plt.xlabel("Date")
plt.title("Stacked diagram of COVID-19 cases");
In [28]:
world_cases_growth['deaths'].values[-1]/world_cases_growth['Count'].values[-1]
Out[28]:
0.045631267917232765

Increase in the trend of people recovering is a very good sign and considering the growth of confirmed cases and # of deaths, mortality rate is still as low as 4.6%

Let us now see the growth of virus in each country

In [29]:
con_cases_growth = confirmed_cases.groupby('Country/Region')[days_columns].agg(np.sum).reset_index()
con_cases_growth.head()
Out[29]:
Country/Region 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 1/28/20 1/29/20 1/30/20 ... 6/29/20 6/30/20 7/1/20 7/2/20 7/3/20 7/4/20 7/5/20 7/6/20 7/7/20 7/8/20
0 Afghanistan 0 0 0 0 0 0 0 0 0 ... 31238 31517 31836 32022 32324 32672 32951 33190 33384 33594
1 Albania 0 0 0 0 0 0 0 0 0 ... 2466 2535 2580 2662 2752 2819 2893 2964 3038 3106
2 Algeria 0 0 0 0 0 0 0 0 0 ... 13571 13907 14272 14657 15070 15500 15941 16404 16879 17348
3 Andorra 0 0 0 0 0 0 0 0 0 ... 855 855 855 855 855 855 855 855 855 855
4 Angola 0 0 0 0 0 0 0 0 0 ... 276 284 291 315 328 346 346 346 386 386

5 rows × 170 columns

As per the latest data lets see what is the severity of cases in each country

In [31]:
import requests
html_response = requests.get("https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_daily_reports?_pjax=%23js-repo-pjax-container")
In [39]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(html_response.content, "html.parser")
In [43]:
a_tags = list(map(lambda x : x.get('href'), soup.findAll(name = 'a', attrs={'class' : 'js-navigation-open link-gray-dark'})))
In [45]:
repo_path = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/"+os.path.basename(a_tags[-2])
repo_path
Out[45]:
'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/07-08-2020.csv'
In [46]:
# daily_data = "csse_covid_19_data\\csse_covid_19_daily_reports"
# daily_data_dir = os.path.join(os.path.dirname(os.getcwd()), daily_data)
# daily_data_files = glob.glob(daily_data_dir+'/*.csv')[-1]
daily_data_files = repo_path
In [47]:
latest_data = pd.read_csv(daily_data_files)
# latest_data = latest_data.groupby(['Country_Region']).agg([])
latest_data.head()
Out[47]:
FIPS Admin2 Province_State Country_Region Last_Update Lat Long_ Confirmed Deaths Recovered Active Combined_Key Incidence_Rate Case-Fatality_Ratio
0 45001.0 Abbeville South Carolina US 2020-07-09 04:34:23 34.223334 -82.461707 137 1 0 136.0 Abbeville, South Carolina, US 558.568109 0.729927
1 22001.0 Acadia Louisiana US 2020-07-09 04:34:23 30.295065 -92.414197 1094 44 0 1050.0 Acadia, Louisiana, US 1763.236361 4.021938
2 51001.0 Accomack Virginia US 2020-07-09 04:34:23 37.767072 -75.632346 1042 14 0 1028.0 Accomack, Virginia, US 3224.408962 1.343570
3 16001.0 Ada Idaho US 2020-07-09 04:34:23 43.452658 -116.241552 3431 23 0 3408.0 Ada, Idaho, US 712.436175 0.670358
4 19001.0 Adair Iowa US 2020-07-09 04:34:23 41.330756 -94.471059 17 0 0 17.0 Adair, Iowa, US 237.695749 0.000000
In [48]:
latest_data['Province_State'] = latest_data['Province_State'].fillna(latest_data['Country_Region'])
latest_data['Active'] = latest_data['Confirmed'] - (latest_data[['Deaths', 'Recovered']].sum(axis = 1))
In [49]:
agg_latest_data = latest_data.groupby('Country_Region')['Confirmed', 'Deaths', 'Recovered', 'Active'].agg([np.sum]).reset_index()
agg_latest_data.columns = agg_latest_data.columns.droplevel(1) 
agg_latest_data.head()
Out[49]:
Country_Region Confirmed Deaths Recovered Active
0 Afghanistan 33594 936 20700 11958
1 Albania 3106 83 1791 1232
2 Algeria 17348 978 12329 4041
3 Andorra 855 52 802 1
4 Angola 386 21 117 248
In [50]:
agg_latest_data = agg_latest_data.loc[(agg_latest_data[['Confirmed', 'Deaths', 'Recovered']] > 0).all(axis=1), :].reset_index(drop = True)
In [51]:
manual_dict = {'mainland china' : 'CHN', 'north macedonia' : 'MKD', 'palestine' : 'PSE', 
               'saint barthelemy' : 'FRA', 'south korea' : 'KOR', 'st. martin' : 'PYC', 
               'uk' : 'GBR', 'us' : 'USA', 'vatican city': 'ITA'}
def get_country_code(series):
    df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')
    df2 = pd.read_csv('countryCodes.csv', sep = '\t')
    country_code_dict = {row[1][0].lower() : row[1][2] for row in df.iterrows()}
    added_dict = {row[1][0].lower() : row[1][2] for row in df2.iterrows()}
#     print(country_code_dict)
    res = []
    for key in series:
        if key.lower() in country_code_dict:
            res.append(country_code_dict.get(key.lower(), None))
        elif key.lower() in added_dict:
            res.append(added_dict.get(key.lower(), None))
        else:
            res.append(manual_dict.get(key.lower(), None))
    return res
In [52]:
agg_latest_data['Code'] = get_country_code(agg_latest_data['Country_Region'])
In [53]:
from scipy.stats import rankdata
In [54]:
fig = px.scatter_geo(agg_latest_data, locations='Code', color=rankdata(agg_latest_data['Active'])/agg_latest_data.shape[0], 
                     size = np.log(agg_latest_data['Active']+7),
                     hover_data = ['Country_Region', 'Active'], 
                     projection="natural earth", color_continuous_scale = 'YlOrRd',
                   title="Count of Active COVID-19 cases in each country")
fig.update_geos(resolution=110,
    showcountries=True, showcoastlines=False,)
# fig.show()
# fig.write_html("images/worldplot.html")
iplot(fig, filename='images/worldplot')
In [55]:
fig = px.choropleth(agg_latest_data, locations='Code', color=rankdata(agg_latest_data['Active'])/agg_latest_data.shape[0], 
                     hover_data = ['Country_Region', 'Active'], 
                     projection="natural earth", color_continuous_scale = 'YlOrRd',
                   title="Count of Active COVID-19 cases in each country")
fig.update_geos(resolution=110,
    showcountries=True, showcoastlines=False,)
# fig.show()
# fig.write_html("images/worldplot.html")
iplot(fig, filename='images/worldplot')

On every day new people get affected, a part of affected people recover and some people die. Let us see what is the count of active cases in each day.

Province/State wise distribution of Active COVID-19 cases in Each country

In [56]:
latest_data.head()
Out[56]:
FIPS Admin2 Province_State Country_Region Last_Update Lat Long_ Confirmed Deaths Recovered Active Combined_Key Incidence_Rate Case-Fatality_Ratio
0 45001.0 Abbeville South Carolina US 2020-07-09 04:34:23 34.223334 -82.461707 137 1 0 136 Abbeville, South Carolina, US 558.568109 0.729927
1 22001.0 Acadia Louisiana US 2020-07-09 04:34:23 30.295065 -92.414197 1094 44 0 1050 Acadia, Louisiana, US 1763.236361 4.021938
2 51001.0 Accomack Virginia US 2020-07-09 04:34:23 37.767072 -75.632346 1042 14 0 1028 Accomack, Virginia, US 3224.408962 1.343570
3 16001.0 Ada Idaho US 2020-07-09 04:34:23 43.452658 -116.241552 3431 23 0 3408 Ada, Idaho, US 712.436175 0.670358
4 19001.0 Adair Iowa US 2020-07-09 04:34:23 41.330756 -94.471059 17 0 0 17 Adair, Iowa, US 237.695749 0.000000
In [57]:
fig = px.treemap(latest_data, path = ['Country_Region', 'Province_State'], values = np.log(latest_data['Active']+7),
                hover_data=['Active'], title = "Province/State wise distribution of Active COVID-19 cases in Each country")
fig.update_layout(width=2000, 
                  height=1500)
iplot(fig, filename='images/treemap_active.html')
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:1: RuntimeWarning:

invalid value encountered in log

active = confirmed - (recovered + dead)

In [58]:
world_cases_growth['active_cases'] = world_cases_growth['Count'] - (world_cases_growth[['recovered', 'deaths']].sum(axis = 1))
In [59]:
fig = px.scatter(world_cases_growth, x = 'Date', y = 'active_cases', title='Daily count of active cases of COVID 19 through out the world')
# fig.show()
iplot(fig, filename='active_cases')

The number of Active cases have almost doubled with in the span of one week, 55K on 12-Mar and the number went past 120k on 18-Mar

Let us have a look at the list of countries in which Active cases grew. For now we will concentrate on a selected list of countries which have been appearing a lot in News.
In [81]:
confirmed_cases
Out[81]:
Province/State Country/Region Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 ... 6/29/20 6/30/20 7/1/20 7/2/20 7/3/20 7/4/20 7/5/20 7/6/20 7/7/20 7/8/20
0 NaN Afghanistan 33.000000 65.000000 0 0 0 0 0 0 ... 31238 31517 31836 32022 32324 32672 32951 33190 33384 33594
1 NaN Albania 41.153300 20.168300 0 0 0 0 0 0 ... 2466 2535 2580 2662 2752 2819 2893 2964 3038 3106
2 NaN Algeria 28.033900 1.659600 0 0 0 0 0 0 ... 13571 13907 14272 14657 15070 15500 15941 16404 16879 17348
3 NaN Andorra 42.506300 1.521800 0 0 0 0 0 0 ... 855 855 855 855 855 855 855 855 855 855
4 NaN Angola -11.202700 17.873900 0 0 0 0 0 0 ... 276 284 291 315 328 346 346 346 386 386
5 NaN Antigua and Barbuda 17.060800 -61.796400 0 0 0 0 0 0 ... 69 69 69 69 68 68 68 70 70 70
6 NaN Argentina -38.416100 -63.616700 0 0 0 0 0 0 ... 62268 64530 67197 69941 72786 75376 77815 80447 83426 87030
7 NaN Armenia 40.069100 45.038200 0 0 0 0 0 0 ... 25127 25542 26065 26658 27320 27900 28606 28936 29285 29820
8 Australian Capital Territory Australia -35.473500 149.012400 0 0 0 0 0 0 ... 108 108 108 108 108 108 108 108 111 112
9 New South Wales Australia -33.868800 151.209300 0 0 0 0 3 4 ... 3189 3203 3211 3211 3405 3419 3429 3433 3440 3453
10 Northern Territory Australia -12.463400 130.845600 0 0 0 0 0 0 ... 29 29 30 30 30 30 30 30 30 30
11 Queensland Australia -28.016700 153.400000 0 0 0 0 0 0 ... 1067 1067 1067 1067 1067 1067 1067 1068 1068 1068
12 South Australia Australia -34.928500 138.600700 0 0 0 0 0 0 ... 443 443 443 443 443 443 443 443 443 443
13 Tasmania Australia -41.454500 145.970700 0 0 0 0 0 0 ... 228 228 228 228 228 228 228 228 228 228
14 Victoria Australia -37.813600 144.963100 0 0 0 0 1 1 ... 2159 2231 2303 2368 2368 2536 2660 2824 2942 3098
15 Western Australia Australia -31.950500 115.860500 0 0 0 0 0 0 ... 611 611 611 611 611 612 618 621 624 624
16 NaN Austria 47.516200 14.550100 0 0 0 0 0 0 ... 17723 17766 17873 17941 18050 18165 18280 18365 18421 18513
17 NaN Azerbaijan 40.143100 47.576900 0 0 0 0 0 0 ... 16968 17524 18112 18684 19267 19801 20324 20837 21374 21916
18 NaN Bahamas 25.034300 -77.396300 0 0 0 0 0 0 ... 104 104 104 104 104 104 104 104 104 106
19 NaN Bahrain 26.027500 50.550000 0 0 0 0 0 0 ... 26239 26758 27414 27837 28410 28857 29367 29821 30321 30931
20 NaN Bangladesh 23.685000 90.356300 0 0 0 0 0 0 ... 141801 145483 149258 153277 156391 159679 162417 165618 168645 172134
21 NaN Barbados 13.193900 -59.543200 0 0 0 0 0 0 ... 97 97 97 97 97 97 98 98 98 98
22 NaN Belarus 53.709800 27.953400 0 0 0 0 0 0 ... 61790 62118 62424 62698 62997 63270 63554 63804 64003 64224
23 NaN Belgium 50.833300 4.000000 0 0 0 0 0 0 ... 61361 61427 61509 61598 61727 61838 62016 62058 62058 62123
24 NaN Benin 9.307700 2.315800 0 0 0 0 0 0 ... 1187 1199 1199 1199 1199 1199 1199 1199 1199 1199
25 NaN Bhutan 27.514200 90.433600 0 0 0 0 0 0 ... 77 77 77 77 77 78 80 80 80 80
26 NaN Bolivia -16.290200 -63.588700 0 0 0 0 0 0 ... 32125 33219 34227 35528 36818 38071 39297 40509 41545 42984
27 NaN Bosnia and Herzegovina 43.915900 17.679100 0 0 0 0 0 0 ... 4325 4453 4606 4788 4962 4962 4962 5458 5621 5869
28 NaN Brazil -14.235000 -51.925300 0 0 0 0 0 0 ... 1368195 1402041 1448753 1496858 1539081 1577004 1603055 1623284 1668589 1713160
29 NaN Brunei 4.535300 114.727700 0 0 0 0 0 0 ... 141 141 141 141 141 141 141 141 141 141
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
236 NaN Timor-Leste -8.874217 125.727539 0 0 0 0 0 0 ... 24 24 24 24 24 24 24 24 24 24
237 NaN Belize 13.193900 -59.543200 0 0 0 0 0 0 ... 24 24 28 28 30 30 30 30 30 30
238 NaN Laos 19.856270 102.495496 0 0 0 0 0 0 ... 19 19 19 19 19 19 19 19 19 19
239 NaN Libya 26.335100 17.228331 0 0 0 0 0 0 ... 802 824 874 891 918 989 1046 1117 1182 1268
240 NaN West Bank and Gaza 31.952200 35.233200 0 0 0 0 0 0 ... 2185 2428 2758 3080 3334 3835 4277 4341 4647 5029
241 NaN Guinea-Bissau 11.803700 -15.180400 0 0 0 0 0 0 ... 1654 1654 1654 1654 1765 1765 1765 1790 1790 1790
242 NaN Mali 17.570692 -3.996166 0 0 0 0 0 0 ... 2173 2181 2202 2260 2285 2303 2330 2331 2348 2358
243 NaN Saint Kitts and Nevis 17.357822 -62.782998 0 0 0 0 0 0 ... 15 15 15 15 15 16 16 16 16 16
244 Northwest Territories Canada 64.825500 -124.845700 0 0 0 0 0 0 ... 5 5 5 5 5 5 5 5 5 5
245 Yukon Canada 64.282300 -135.000000 0 0 0 0 0 0 ... 11 11 11 11 11 11 11 11 11 11
246 NaN Kosovo 42.602636 20.902977 0 0 0 0 0 0 ... 2677 2878 2991 3064 3064 3064 3356 3508 3703 3886
247 NaN Burma 21.916200 95.956000 0 0 0 0 0 0 ... 299 299 303 304 306 313 313 316 316 317
248 Anguilla United Kingdom 18.220600 -63.068600 0 0 0 0 0 0 ... 3 3 3 3 3 3 3 3 3 3
249 British Virgin Islands United Kingdom 18.420700 -64.640000 0 0 0 0 0 0 ... 8 8 8 8 8 8 8 8 8 8
250 Turks and Caicos Islands United Kingdom 21.694000 -71.797900 0 0 0 0 0 0 ... 41 41 42 44 44 45 47 48 49 55
251 NaN MS Zaandam 0.000000 0.000000 0 0 0 0 0 0 ... 9 9 9 9 9 9 9 9 9 9
252 NaN Botswana -22.328500 24.684900 0 0 0 0 0 0 ... 175 227 227 227 277 277 277 314 314 314
253 NaN Burundi -3.373100 29.918900 0 0 0 0 0 0 ... 170 170 170 170 191 191 191 191 191 191
254 NaN Sierra Leone 8.460555 -11.779889 0 0 0 0 0 0 ... 1450 1462 1498 1518 1524 1533 1542 1547 1572 1584
255 Bonaire, Sint Eustatius and Saba Netherlands 12.178400 -68.238500 0 0 0 0 0 0 ... 7 7 7 7 7 7 7 7 7 7
256 NaN Malawi -13.254308 34.301525 0 0 0 0 0 0 ... 1152 1224 1265 1342 1498 1613 1613 1742 1818 1864
257 Falkland Islands (Malvinas) United Kingdom -51.796300 -59.523600 0 0 0 0 0 0 ... 13 13 13 13 13 13 13 13 13 13
258 Saint Pierre and Miquelon France 46.885200 -56.315900 0 0 0 0 0 0 ... 1 1 1 1 1 1 1 1 1 1
259 NaN South Sudan 6.877000 31.307000 0 0 0 0 0 0 ... 1989 2007 2021 2021 2021 2021 2021 2021 2021 2021
260 NaN Western Sahara 24.215500 -12.885800 0 0 0 0 0 0 ... 10 10 10 10 10 10 10 10 10 10
261 NaN Sao Tome and Principe 0.186360 6.613081 0 0 0 0 0 0 ... 713 714 715 717 719 719 720 721 724 724
262 NaN Yemen 15.552727 48.516388 0 0 0 0 0 0 ... 1128 1158 1190 1221 1240 1248 1265 1284 1297 1318
263 NaN Comoros -11.645500 43.333300 0 0 0 0 0 0 ... 272 303 303 303 309 309 311 311 311 313
264 NaN Tajikistan 38.861034 71.276093 0 0 0 0 0 0 ... 5900 5900 6005 6058 6058 6159 6213 6262 6315 6364
265 NaN Lesotho -29.609988 28.233608 0 0 0 0 0 0 ... 27 27 35 35 35 35 79 91 91 91

266 rows × 173 columns

In [82]:
selected_countries = ['China', 'Italy', 'France', 'Spain','Germany', 'Iran', 
                      'Korea, South', 'US', 'United Kingdom', 'Switzerland', 'India']
selected_dates = [1]+list(range(4, confirmed_cases.shape[1]))
# selected_dates = [1]+list(range(confirmed_cases.shape[1], confirmed_cases.shape[1]))
In [83]:
confirmed_cases_view = confirmed_cases.iloc[:, selected_dates]
confirmed_cases_view = confirmed_cases_view.loc[confirmed_cases_view['Country/Region'].isin(selected_countries)].groupby('Country/Region').agg([sum]).reset_index(drop=False)
confirmed_cases_view.columns = confirmed_cases_view.columns.droplevel(1)
confirmed_cases_view
Out[83]:
Country/Region 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 1/28/20 1/29/20 1/30/20 ... 6/29/20 6/30/20 7/1/20 7/2/20 7/3/20 7/4/20 7/5/20 7/6/20 7/7/20 7/8/20
0 China 548 643 920 1406 2075 2877 5509 6087 8141 ... 84780 84785 84816 84830 84838 84857 84871 84889 84917 84950
1 France 0 0 2 3 3 3 4 5 5 ... 201522 202063 202981 203640 204222 204222 204222 205597 206072 206072
2 Germany 0 0 0 0 0 1 4 4 4 ... 195042 195418 195893 196370 196780 197198 197523 198064 198343 198699
3 India 0 0 0 0 0 0 0 0 1 ... 566840 585481 604641 625544 648315 673165 697413 719664 742417 767296
4 Iran 0 0 0 0 0 0 0 0 0 ... 225205 227662 230211 232863 235429 237878 240438 243051 245688 248379
5 Italy 0 0 0 0 0 0 0 0 0 ... 240436 240578 240760 240961 241184 241419 241611 241819 241956 242149
6 Korea, South 1 1 2 2 3 4 4 4 4 ... 12800 12850 12904 12967 13030 13091 13137 13181 13244 13293
7 Spain 0 0 0 0 0 0 0 0 0 ... 248970 249271 249659 250103 250545 250545 250545 251789 252130 252513
8 Switzerland 0 0 0 0 0 0 0 0 0 ... 31652 31714 31851 31967 32101 32198 32268 32315 32369 32498
9 US 1 1 2 2 5 5 5 5 5 ... 2590668 2636414 2687588 2742049 2795361 2841241 2891124 2936077 2996098 3054699
10 United Kingdom 0 0 0 0 0 0 0 0 0 ... 284812 285216 285279 285285 285787 286412 286931 287290 287874 288511

11 rows × 170 columns

In [84]:
recovered_cases_view = recovered_cases.iloc[:, selected_dates]
recovered_cases_view = recovered_cases_view.loc[recovered_cases_view['Country/Region'].isin(selected_countries)].groupby('Country/Region').agg([sum]).reset_index(drop=False)
recovered_cases_view.columns = recovered_cases_view.columns.droplevel(1)
# recovered_cases_view
In [85]:
deaths_data_view = deaths_data.iloc[:, selected_dates]
deaths_data_view = deaths_data_view.loc[deaths_data_view['Country/Region'].isin(selected_countries)].groupby('Country/Region').agg([sum]).reset_index(drop=False)
deaths_data_view.columns = deaths_data_view.columns.droplevel(1)
# deaths_data_view
In [86]:
active_data_view = confirmed_cases_view.iloc[:, 1:] - (recovered_cases_view.iloc[:, 1:]+deaths_data_view.iloc[:, 1:])
# active_data_view
In [87]:
import plotly.graph_objects as go
In [89]:
fig = go.Figure()
for i in range(confirmed_cases_view.shape[0]):
    fig.add_trace(go.Scatter(x = confirmed_cases_view.columns[1:], y = active_data_view.iloc[i, :], 
                             name = confirmed_cases_view.iloc[i, 0]))
fig.update_layout(title = "Spread of COVID-19 in last 60 days in major Countries", height=800, width = 1000)
iplot(fig, filename='country_spread')

The graph shows how much a nation is prepared for an eidemic, clearly Italy was not expecting such massive outbreak. China on the otherhand showed the world that with proper care this Virus can be contained.

In [90]:
from ipywidgets import interact, widgets
from chart_studio.widgets import GraphWidget
In [91]:
@interact
def scatter_plot(country1 = countries, country2 = countries):
    fig = go.Figure()
    index1 = countries.index(country1)
    index2 = countries.index(country2)
    active_counts1 = active_data_view.iloc[index1, :]
    active_counts2 = active_data_view.iloc[index2, :]
    trace1 = fig.add_trace(go.Scatter(x = confirmed_cases_view.columns[1:], y = active_counts1, name=country1))
    trace2 = fig.add_trace(go.Scatter(x = confirmed_cases_view.columns[1:], y = active_counts2, name=country2))
    iplot(fig)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-91-3af1d430781a> in <module>()
      1 @interact
----> 2 def scatter_plot(country1 = countries, country2 = countries):
      3     fig = go.Figure()
      4     index1 = countries.index(country1)
      5     index2 = countries.index(country2)

NameError: name 'countries' is not defined